home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Collection of Tools & Utilities
/
Collection of Tools and Utilities.iso
/
tex
/
gestalt.zip
/
SIMIL_A.ASM
< prev
next >
Wrap
Assembly Source File
|
1988-12-06
|
13KB
|
339 lines
;From the article "Pattern Matching - The Gestalt Approach"
;by John W. Ratcliff and David E. Metzener
;Dr. Dobbs Journal, July 1988
; This version is for use in assembly language programs.
; See TESTSIM.ASM for a demonstration.
;
; - All required local variables are right here in this procedure.
; - Enter with DS:SI = pointer to string 1,
; DS:DI = pointer to string 2
; Preserves all registers except AX (which returns the similarity)
; It is CRITICAL that SS=CSeg! Else the bp referencing will not work!
; I have no intentions of hacking it for other configurations ..
; that's left as an exercise for the student.
; (Hint: Look in the Turbo Pascal version for an example.)
;
; David Kirschbaum
; Toad Hall
; kirsch@braggvax.ARPA
;TITLE SIMIL.ASM written by John W. Ratcliff and David E. Metzener
; November 10, 1987
; Uses the Ratcliff/Obershelp pattern recognition algorithm.
; The function SIMIL returns a percentage value corresponding to how
; alike any two strings are. Be certain to upper case the two strings
; passed if you are not concerned about case sensitivity.
_Simil_a proc near
;TH Data moved down here so it'll be completely contained in our procedure.
ststr1L dw 25 dup(?) ;contains lefts for string 1
ststr1R dw 25 dup(?) ;contains rights for string 1
ststr2L dw 25 dup(?) ;contains lefts for string 2
ststr2R dw 25 dup(?) ;contains rights for string 2
stcknum dw ? ;number of elements on the stack
score dw ? ;the # of chars in common times 2
total dw ? ;total # of chars in string 1 and 2
cL1 dw ? ;left of string 1 found in common
cR1 dw ? ;right of string 1 found in common
cL2 dw ? ;left of string 2 found in common
cR2 dw ? ;right of string 2 found in common
s2ed dw ? ;the end of string 2 used in compare
_simil:
; This routine expects pointers passed in two character strings, null
; terminated, that you wish compared. It returns a percentage value
; from 0 to 100% corresponding to how alike the two strings are.
; Usage: DS:SI = pointer to string 1 TH
; DS:DI = pointer to string 2 TH
; The similarity routine is composed of three major components
; pushst --- pushes a string section to be compared on the stack
; popst --- pops a string section to be examined off of the stack
; compare --- finds the largest group of characters in common between
; any two string sections
; The similarity routine begins by computing the total length of both
; strings passed and placing that value in TOTAL. It then takes
; the beginning and ending of both strings passed and pushes them on
; the stack. It then falls into the main line code.
; The original two strings are immediately popped off of the stack and
; are passed to the compare routine. The compare routine will find the
; largest group of characters in common between the two strings.
; The number of characters in common is multiplied times two and added
; to the total score. If there were no characters in common, then there
; is nothing to push onto the stack. If there are exactly one character
; to the left in both strings, then we needn't push it on the stack.
; (We already know they aren't equal from the previous call to compare.)
; Otherwise the characters to the left are pushed onto the stack. These
; same rules apply to characters to the right of the substring found in
; common. This process of pulling substrings off of the stack, comparing
; them, and pushing remaining sections on the stack is continued until
; the stack is empty. On return the total score is divided by the
; number of characters in both strings. This is multiplied times 100 to
; yield a percentage. This percentage similarity is returned to the
; calling procedure.
push bp ;save BP reg.
mov bp,sp ;save SP reg in BP for use in program
push bx ;save all regs we're gonna trash TH
push cx
push dx
push si
push di
push ES ;save the ES seg register
mov ax,DS ;copy DS segment reg to ES
mov ES,ax
xor ax,ax ;zero out AX for clearing of SCORE var
mov score,ax ;zero out SCORE
mov stcknum,ax ;initialize number of stack entries to 0
;TH DS:SI and DS:DI already point to string 1 and string 2 respectively.
;TH mov si,[bp+4] ;move beginning pointer of string 1 to SI
;TH mov di,[bp+6] ;move beginning pointer of string 2 to SI
cmp [si],al ;is it a null string?
je StrErr ;can't process null strings
cmp [di],al ;is it a null string?
jne DoCmp ;Neither is a null string, so process them
StrErr: jmp Donit ;exit routine
DoCmp: push di ;save DI because of SCAS opcode
push si ;save SI because of SCAS opcode
xor al,al ;clear out AL to search for end of string
cld ;set direction flag forward
mov cx,-1 ;make sure we repeat the correct # of times
repnz scasb ;scan for string delimiter in string 2
dec di ;point DI to '$00' byte of string 2
dec di ;point DI to last char of string 2
mov bp,di ;move DI to BP where it is supposed to be
pop di ;restore SI into DI for SCAS (string 1)
repnz scasb ;scan for string delimiter in string 1
not cx ;do one's complement for correct length of st
sub cx,2 ;subtract the two zero bytes at the end of st
mov total,cx ;store string 2's length
dec di ;point DI to '$00' byte of string 1
dec di ;point DI to last char of string 1
mov bx,di ;move DI to BX where it is supposed to be
pop di ;restore DI to what it should be
call PushSt ;push values for the first call to SIMILIARITY
Main: cmp stcknum,0 ;is there anything on the stack?
je Done ;no, then all done!
call PopSt ;get regs set up for a compare call
call Compare ;do compare for this substring set
or dx,dx ;if nothing in common then nothing to push TH
jz Main ;try another set TH
shl dx,1 ;*2 for add to score
add score,dx ;add into score
mov bp,stcknum ;get number of entry I want to look at
shl bp,1 ;get AX ready to access string stacks
mov si,[ststr1L+bp] ;move L1 into SI or L1
mov bx,cL1 ;move CL1 into BX or R1
mov di,[ststr2L+bp] ;move L2 into DI or L2
mov cx,cL2 ;move CL2 into CX temporarily
mov ax,[ststr1R+bp] ;get old R1 off of stack
mov cL1,ax ;place in CL1 temporarily
mov ax,[ststr2R+bp] ;get old R2 off of stack
mov cL2,ax ;save in CL2 temporarily
mov bp,cx ;place CL2 into BP
cmp bx,si ;compare CL1 to L1
je ChRght ;if zero, then nothing on left side string 1
cmp bp,di ;compare CL2 to L2
je ChRght ;if zero, then nothing on left side string 2
dec bx ;point to last part of left side string 1
dec bp ;point to last part of left side string 2
cmp bx,si ;only one char to examine?
jne PushIt ;no->we need to examine this
cmp bp,di ;only one char in both?
je ChRght ;nothing to look at if both only contain 1 char
PushIt: call PushSt ;push left side on stack
ChRght: mov si,cR1 ;move CR1 into SI or L1
mov bx,cL1 ;move R1 into BX or R1
mov di,cR2 ;move CR2 into DI or L2
mov bp,cL2 ;move R2 into BP or R2
cmp si,bx ;compare CR1 to R1
je Main ;If zero, then nothing on right side string 1
cmp di,bp ;compare CR2 to R2
je Main ;if zero, then nothing on right side string 2
inc si ;point to last part of right side string 1
inc di ;point to last part of right side string 2
cmp bx,si ;only one char to examine?
jne Push2 ;no-> examine it
cmp bp,di ;only 1 char to examine in both?
je Main ;yes-> get next string off of stack
Push2: call PushSt ;push right side on stack
jmp short Main ;do next level of compares
Done: mov ax,score ;get score into AX for MUL
or ax,ax ;anything? TH
jz Donit ;nope, forget this mess TH
mov cx,100 ;get 100 into CX for MUL
mul cx ;multiply by 100
mov cx,total ;get total chars for divide
div cx ;divide by total
Donit:
pop ES ;restore ES to entry value
pop di ;and all other regs also TH
pop si
pop dx
pop cx
pop bx
pop bp ;restore BP back to entry value
ret ;leave with AX holding % similarity
Compare:
; The compare routine locates the largest group of characters between string 1
; and string 2. This routine assumes t